<--- %%NOBANNER%% --> compare.sas.sas
 BackForward

/*-------------------------------------------------------------------------------\
| SOURCE:   compare.sas                                                          |
|                                                                                |
| VERSION:  RDS v3.0                                                             |
|           Part of the SPIKEware Rapid Decision Support SAS Macro Package       |
|           for SAS v8.x and above                                               |
|                                                                                |
| PURPOSE:  This macro is used to compare multiple datasets and check for any    |
|           similar variable names.  It also does a comparison of the number of  |
|           observations and prints a list of discrepencies of variable names    |
|           and variable types.                                                  |
|                                                                                |
| SYNTAX:   %compare (indata = _SAS_Dataset_names_)                    |
\-------------------------------------------------------------------------------*/
%macro compare (indata=) /des='compares multiple datasets' ;
%let notes=%sysfunc(getoption(notes, keyword)) ;
options nonotes ;
%local indata i merge numdata data obslst ndata mean min max notes ;
%let starttime = %sysfunc(datetime()) ;

/*---------------------------------\
| calculate variables and defaults |
\---------------------------------*/
%let indata = %cmpres(&indata) ;
%let numdata  = %words(&indata) ;
%let merge = %str() ;
%let ndata = %str() ;

/*------------------------------------------------\
| print error message if fewer than two datasets, |
| greater than 9,999,999 datasets, or complete    |
| individual checks if criteria is met.           |
\------------------------------------------------*/
%if &numdata < 2 %then %do ;
   data _null_ ;
      put 'ERROR:  Utility macro %COMPARE requires two or more input datasets.' ;
      put 'ERROR:  The macro will stop executing.' ;
   run ;
   %goto the_end ;
%end ;
%else %if &numdata > 9999999 %then %do ;
   data _null_ ;
      put 'ERROR:  Utility macro %COMPARE allows a maximum of 9,999,999 datasets.' ;
      put 'ERROR:  The macro will stop executing.' ;
   run ;
   %goto the_end ;
%end ;
%else %do i = 1 %to &numdata ;
   /*------------------------------------------------------\
   | the %do loop uses the %scan function to parse out each|
   | individual dataset name from the variable &indata.    |
   \------------------------------------------------------*/
   %let data = %data(%scan(&indata, &i, %str( ))) ;

   /*------------------------------------------------------------\
   | proc contents outputs variables to c&i for check with other |
   | datasets from input string.  note:  cannot check more than  |
   | 9,999,999 datasets (just in case someone actually tries!).  |
   \------------------------------------------------------------*/
   proc contents data=&data noprint 
                 out=c&i(keep   = name format label length nobs type
                         rename = (nobs=nobs&i format=f&i type=t&i length=l&i));
   run;

   /*-----------------------------\
   | create list of contents data |
   \-----------------------------*/
   %let merge  = &merge c&i (in=c&i);

   /*--------------------------------------------------\
   | obslist is the list of variables of the number of |
   | observations.  later in the program, the mean,    |
   | min, and max observations will be calculated.     |
   | the making of this list is broken into two parts: |
   | the first ends in a comma, and the last one does  |
   | not.  this allows the use of the min, mean, and   |
   | max functions later.                              |
   \--------------------------------------------------*/
   %if &i ne &numdata %then %let obslst = &obslst nobs&i,;
   %else %let obslst = &obslst nobs&i;

   %let ndata = %trim(%left(&ndata &data));
%end;

%let indata = &ndata;

/*----------------------------------------------------------\
| this datastep completes the comparison process by merging |
| the data together.                                        |
\----------------------------------------------------------*/
data all (drop=nobs1-nobs&numdata min_obs max_obs mean_obs mtch err_lnt err_typ err_fmt ehold);
   merge &merge ;
   by name ;
   length count $&numdata.. ;
   label length   = 'length'
         format   = 'format'
         name     = 'name'
         match    = 'variable match'
         label    = 'description'
         count    = '1 = yes*0 =  no*<-- scroll check -->' ;
   format min_obs max_obs mean_obs comma16. ;
   array there {&numdata} c1-c&numdata ;
   array l {&numdata} l1-l&numdata ;
   array t {&numdata} t1-t&numdata ;
   array f {&numdata} f1-f&numdata ;
   match = 'good       ' ;
   do i = 1 to &numdata ;
      ehold = 'error = ' ;
      substr(count,i,1) = put(there{i},1.) ;
      if substr(count,i,1) = 0 then match = ehold||'m' ;
      else do ;
         mtch = ' ' ;
         if l1 ne l{&numdata} then do ;
            mtch = 'e' ;
            err_lnt = 'l' ;
         end ;
         if t1 ne t{&numdata} then do ;
            mtch = 'e' ;
            err_typ = 't' ;
         end ;
         if f1 ne f{&numdata} then do ;
            mtch = 'e' ;
            err_fmt = 'f' ;
         end ;
         if mtch = 'e' then match = ehold||err_lnt||err_typ
                                    ||err_fmt ;
      end ;
   end ;

   length = l1 ;
   format = l1 ;

   if t1 = 1 then type = 'num  ' ;
   else if t1 = 2 then type = 'char ' ;
   else type = 'other' ;
   if _n_ = 1 then do ;
      min_obs  = min(&obslst);
      max_obs  = max(&obslst);
      mean_obs = round(mean(&obslst), .1) ;
      call symput ("min", min_obs) ;
      call symput ("max", max_obs) ;
      call symput ("mean", mean_obs) ;
   end ;
run ;

%let min  = &min ;
%let max  = &max ;
%let mean = &mean ;

/*-------------\
| print output |
\-------------*/
title1 "comparison of sas datasets" ;
title2 "&indata" ;
title4 "min obs = &min      mean obs = &mean      max obs = &max" ;
proc print data=all label split='*'; var name count label length format type match; run;

title4 "correct match of data?" ;
proc freq data=all; tables match /missing; run;

/*----------------------------------\
| prepare sas session for open code |
\----------------------------------*/
proc datasets nolist memtype=data library=work;
   delete all %_prefix(c, 1 to &numdata);
run; quit;

%the_end:;
options nonotes;
%timenote (macro=checkrec, starttime=&starttime)
%put;
options ¬es;
%mend compare;